//options
clear all
set maxvar  32000, permanently
set matsize 11000, permanently
set more off, permanently
global bootstraps 1000
set seed 23

// macros
global klmshare				: env klmshare
global projects				: env projects
global klmperry             : env klmperry
global storageb				: env storageb

global perrydatas       = "${klmperry}/CBA/data/perry/raw"
global perrydata        = "${klmperry}/CBA/data/perry/clean"
global masterinter  	= "${klmshare}/CurrentRAs/FredB/test"
global output           = "${projects}/ece_parenting/tex_files/other"
global dataperry        = "$klmperry/PerryPreschool/Data/Perry_PARI_and_Other_Data/PARI/DATA_PARI/"
global abcjpeanalysis   = "${klmshare}/Data_Central/Abecedarian/data/ABC-CARE/extensions/cba-iv/"
global dataihdp         = "${projects}/ece_parenting/data"
global dataihdp         = "$storageb/dc_data/"

cd  $dataihdp
use ihdp_data, clear
rename _all, lower

replace   bw = bw/1000
destring bwg , replace 

// impute iq
replace   stndscor =  iqcage     if stndscor ==.
replace   iqcage   =  stndscor   if iqcage ==.
foreach var of varlist stndscor iqcage { 
	replace `var' = mdi_24cor    if stndscor ==. & iqcage ==.
}
replace   stndscor =  iqcage     if stndscor ==.
replace   iqcage   =  stndscor   if iqcage ==.

// list relevant variables
global baseline_child      twin sex bw anga black hispanic 
global baseline_mother     mage meduc works married 
global baseline_household  welfare tot_siblings_natural employed_adult
global baseline_economy    employment medinc gpc
global outputs             iqcage stndscor 
global inputs              cum_avg_daycare_36m_sum

// mark all sample
reg $baseline_child
gen sample0 = e(sample)

// mark baseline sample
reg $baseline_child $baseline_mother $baseline_household $baseline_economy $outputs $inputs
gen sample1 = e(sample)

// parenting latents, ages 1 and 3
foreach var of varlist alt_subscale_1_12-alt_subscale_6_12 alt_subscale_1_36-alt_subscale_8_36 {
	summ `var'																									   if sample1 == 1
	gen  `var'_std = (`var' - r(mean))/r(sd)                                                                       if sample1 == 1
}
# delimit
sem (_cons@0 X -> alt_subscale_1_12_std) (_cons@0 X -> alt_subscale_2_12_std) (_cons@0 X -> alt_subscale_3_12_std) 
	(_cons@0 X -> alt_subscale_4_12_std) (_cons@0 X -> alt_subscale_5_12_std) (_cons@0 X -> alt_subscale_6_12_std) if sample1 == 1, method(adf);
predict parenting_age1 if sample1 == 1, latent;
sem (_cons@0 X -> alt_subscale_1_36_std) (_cons@0 X -> alt_subscale_2_36_std) (_cons@0 X -> alt_subscale_3_36_std) (_cons@0 X -> alt_subscale_4_36_std)
	(_cons@0 X -> alt_subscale_5_36_std) (_cons@0 X -> alt_subscale_6_36_std) (_cons@0 X -> alt_subscale_7_36_std) (_cons@0 X -> alt_subscale_8_36_std) if sample1 == 1, 
																																						 cov(e.alt_subscale_1_36_std*e.alt_subscale_2_36_std)
																																						 cov(e.alt_subscale_3_36_std*e.alt_subscale_4_36_std)
																																						 cov(e.alt_subscale_5_36_std*e.alt_subscale_6_36_std)
																																						 cov(e.alt_subscale_7_36_std*e.alt_subscale_8_36_std)
																																						 cov(e.alt_subscale_4_36_std*e.alt_subscale_7_36_std)
																																						 cov(e.alt_subscale_2_36_std*e.alt_subscale_7_36_std) method(adf);		
 predict parenting_age3 if sample1 == 1, latent;
# delimit cr
egen    parenting_ages13 = rowmean(parenting_age1 parenting_age3) if sample1 == 1

global inputs_std   cum_avg_daycare_36m_sum parenting_ages13
keep   ihdp site tg bwg sample* $baseline_child $baseline_mother $baseline_household $baseline_economy $inputs_std $outputs ppvtstd wasifsiq

// merge in ages 5 and 8
merge 1:1 ihdp using iqchildhoodihdp
keep  if _merge == 3
drop _merge

// impute iq
replace kidppvt5 = wippsif5 if kidppvt5 ==.
replace wippsif5 = kidppvt5 if wippsif5 ==.

replace ppvtstd8 = fsiq8    if ppvtstd8 ==.
replace fsiq8    = ppvtstd8 if fsiq8    ==.

replace ppvtstd  = wasifsiq if ppvtstd  ==.
replace wasifsiq = ppvtstd  if wasifsiq ==.

reg fsiq8 ppvtstd8 kidppvt5 wippsif5 wasifsiq ppvtstd
gen sample2 = e(sample) 

// outcomes
merge 1:1 ihdp using adultihdp
keep if _merge == 3
drop    _merge

// construct "positive" outcomes
replace idle18y         = 1 - idle18y
replace sch_eversped18y = 1 - sch_eversped18y
replace sch_math18y     = 1 - sch_math18y
replace sch_read18y     = 1 - sch_read18y
replace sch_thrp18y     = 1 - sch_thrp18y
replace teen18y         = 1 - teen18y

gen     smoke18 = .
replace smoke18 = 1 if cigs18y >  0  & cigs18y != .
replace smoke18 = 0 if cigs18y == 0 & cigs18y != .
replace smoke18 = 1 - smoke18

gen     absence18 = .
replace absence18 = 0 if sch_dabs18y < 5  & sch_dabs18y != .
replace absence18 = 1 if sch_dabs18y >= 5 & sch_dabs18y != .
replace absence18 = 1 - absence18 

// marking missing
global ed18  sch_eversped18y sch_math18y sch_read18y sch_test18y
global beh18 smoke18 idle18y sch_thrp18y teen18y
egen missing     = rowmiss($ed18 $beh18)

// average
egen avg_outcome_educ18 = rowmean($ed18)        if missing == 0
egen avg_outcome_beh18  = rowmean($beh18)       if missing == 0
egen avg_outcome18      = rowmean(avg_outcome_educ18 avg_outcome_beh18) if missing == 0

// define sample
reg     avg_outcome18 if sample2 == 1
replace sample2 = 0   if e(sample) == 0
replace sample2 = 0   if   sample1 == 0

// standardize by sample
foreach num of numlist 0(1)2 {
	gen p`num' = parenting_ages13                  if sample`num' == 1
	summ     p`num' if tg == 0                      & sample`num' == 1
	replace  p`num' = (p`num' - r(mean))/r(sd)     if sample`num' == 1
	replace  p`num' = p`num' + 100                 if sample`num' == 1
	gen      c`num' = cum_avg_daycare_36m_sum      if sample`num' == 1
	summ     c`num'  if tg == 0                     & sample`num' == 1
	replace  c`num'  = (c`num'    - r(mean))/r(sd) if sample`num' == 1
	replace  c`num'  =  c`num'    + r(mean)        if sample`num' == 1
}

// describe by sample
global sample_all          if twin != .
global sample_singletons   if twin == 0
global sample_twins        if twin == 1

global baseline_all        twin sex bw anga black hispanic 
global baseline_singletons      sex bw anga black hispanic
global baseline_twins           sex bw anga black hispanic
 
// baseline
foreach group in all singletons twins {
	matrix `group' = J(1,6,.) 
	foreach var of varlist ${baseline_`group'} {
		foreach num of numlist 0 1 2 {
			bootstrap, strata(bwg site tg) reps($bootstraps) : reg `var' tg ${sample_`group'} & sample`num' == 1
			matrix  c`num'`var'_g`group' = e(b)[1,2]
			matrix  b`num'`var'_g`group' = e(b)[1,1]
			matrix se`num'`var'_g`group'      = sqrt(e(V)[1,1])
			matrix  t`num'`var'_g`group'      = abs(b`num'`var'_g`group'[1,1]/se`num'`var'_g`group'[1,1])
			matrix df`num'`var'_g`group' = e(N) - e(rank)
			matrix  p`num'`var'_g`group' = 2*(1 - normal(t`num'`var'_g`group'[1,1]))
			matrix  n`num'`var'_g`group' = e(N)
		}
		matrix `var'_`group' = [ [c0`var'_g`group' \ .],[b0`var'_g`group' \ p0`var'_g`group'],[c1`var'_g`group' \ .],[b1`var'_g`group' \ p1`var'_g`group'],[c2`var'_g`group' \ .],[b2`var'_g`group' \ p2`var'_g`group'] ]
		matrix `group' = [`group' \ `var'_`group']
	}
}
// inputs
foreach group in all singletons twins {
	foreach var in c p {
		foreach num of numlist 0 1 2 {
			bootstrap, strata(bwg site tg) reps($bootstraps) : reg `var'`num' tg ${sample_`group'} & sample`num' == 1
			matrix  c`num'`var'_g`group' = e(b)[1,2]
			matrix  b`num'`var'_g`group' = e(b)[1,1]
			matrix se`num'`var'_g`group'      = sqrt(e(V)[1,1])
			matrix  t`num'`var'_g`group'      = abs(b`num'`var'_g`group'[1,1]/se`num'`var'_g`group'[1,1])
			matrix df`num'`var'_g`group' = e(N) - e(rank)
			matrix  p`num'`var'_g`group' = 2*(1 - normal(t`num'`var'_g`group'[1,1]))
			matrix  n`num'`var'_g`group' = e(N)
		}
		matrix `var'_`group' = [ [c0`var'_g`group' \ .],[b0`var'_g`group' \ p0`var'_g`group'],[c1`var'_g`group' \ .],[b1`var'_g`group' \ p1`var'_g`group'],[c2`var'_g`group' \ .],[b2`var'_g`group' \ p2`var'_g`group'] ]
		matrix `group' = [`group' \ `var'_`group']
	}
}
// output
foreach group in all singletons twins {
	foreach var of varlist iqcage {
		foreach num of numlist 0 1 2 {
			bootstrap, strata(bwg site tg) reps($bootstraps) : reg `var' tg ${sample_`group'} & sample`num' == 1
			matrix  c`num'`var'_g`group' = e(b)[1,2]
			matrix  b`num'`var'_g`group' = e(b)[1,1]
			matrix se`num'`var'_g`group'      = sqrt(e(V)[1,1])
			matrix  t`num'`var'_g`group'      = abs(b`num'`var'_g`group'[1,1]/se`num'`var'_g`group'[1,1])
			matrix df`num'`var'_g`group' = e(N) - e(rank)
			matrix  p`num'`var'_g`group' = 2*(1 - normal(t`num'`var'_g`group'[1,1]))
			matrix  n`num'`var'_g`group' = e(N)
		}
		matrix `var'_`group' = [ [c0`var'_g`group' \ .],[b0`var'_g`group' \ p0`var'_g`group'],[c1`var'_g`group' \ .],[b1`var'_g`group' \ p1`var'_g`group'],[c2`var'_g`group' \ .],[b2`var'_g`group' \ p2`var'_g`group'] ]
		matrix `group' = [`group' \ `var'_`group']
	}
	matrix `group' = `group'[2...,1...]
}
		
matrix all = [ J(7,6,.)  \ all[1..12,1...] \ J(1,6,.) \ all[13...,1...] \ J(2,6,.) \ singletons[1..10,1...] \ J(1,6,.) \ singletons[11...,1...] \ J(2,6,.) \ twins[1..10,1...] \ J(1,6,.) \ twins[11...,1...] ]
clear
svmat all

// clear for base sample
foreach num of numlist 21 23 25 40 42 44 59 61 63 {
	foreach var of varlist all1 all2 {
		replace `var' =. if _n == `num'
	}
}
foreach num of numlist 22 24 26 41 43 45 60 62 64 {
	replace all2 =. if _n == `num'
}


gen   n = _n		
// format
foreach var of varlist all* {
	gen    `var'and = "&" if n !=1
}

// header
gen all0and = "&" if n != 1
gen       all1_0 = "\begin{tabular}{ l cccccc} \toprule"    if n == 1

replace all1_0   = " "                    	      		    if n == 2
replace all0and  = " & \multicolumn{1}{c}{(1)}"    if n == 2
replace all1and  = " & \multicolumn{1}{c}{(2)}"    if n == 2
replace all2and  = " & \multicolumn{1}{c}{(3)}"    if n == 2
replace all3and  = " & \multicolumn{1}{c}{(4)}"    if n == 2
replace all4and  = " & \multicolumn{1}{c}{(5)}"    if n == 2
replace all5and  = " & \multicolumn{1}{c}{(6)}"    if n == 2

replace all1_0   = " & "                    	      		    			   if n == 3
replace all0and  = "  \multicolumn{2}{c}{Baseline Observed for All}"     	   if n == 3
replace all1and  = "   "           						      		           if n == 3
replace all2and  = "  & \multicolumn{2}{c}{Main Analysis Sample}"     		   if n == 3
replace all3and  = "   "           						      		           if n == 3
replace all4and  = "  & \multicolumn{2}{c}{Longer-Term Analysis Sample}"       if n == 3
replace all5and  = "   "           						      		           if n == 3

replace all1_0   = " "                    	      		      if n == 4
replace all0and  = " & \multicolumn{1}{c}{Control}"           if n == 4
replace all1and  = " & \multicolumn{1}{c}{$ \Delta $}"        if n == 4
replace all2and  = " & \multicolumn{1}{c}{Control}"           if n == 4
replace all3and  = " & \multicolumn{1}{c}{$ \Delta $}"        if n == 4
replace all4and  = " & \multicolumn{1}{c}{Control}"           if n == 4
replace all5and  = " & \multicolumn{1}{c}{$ \Delta $}"        if n == 4

replace all1_0   = " "                    	      		    	   			   if n == 5
replace all0and  = " & \multicolumn{1}{c}{ }"       						   if n == 5
replace all1and  = " & \multicolumn{1}{c}{$ [p $-value]}"        if n == 5
replace all2and  = " & \multicolumn{1}{c}{ }"       						   if n == 5
replace all3and  = " & \multicolumn{1}{c}{$ [p $-value]}"        if n == 5
replace all4and  = " & \multicolumn{1}{c}{ }"       						   if n == 5
replace all5and  = " & \multicolumn{1}{c}{$ [p $-value]}"        if n == 5

replace all1_0   = "  \multicolumn{7}{l}{\textbf{\textit{Panel a}. All}}"            						 if n == 6
replace all0and  = "   "           						      		                		   					 if n == 6
replace all1and  = "   "           						      		                		   					 if n == 6
replace all2and  = "   "           						      		                		   					 if n == 6
replace all3and  = "   "           						      		               			   					 if n == 6
replace all4and  = "   "           						      		               			   					 if n == 6
replace all5and  = "   "           						      		               			   					 if n == 6

replace all1_0   = "  \multicolumn{6}{l}{\hspace{2mm} \textbf{\textit{Panel a1}. Baseline}}"      if n == 7
replace all0and  = "   "           						      		                		      if n == 7
replace all1and  = "   "           						      		                		      if n == 7
replace all2and  = "   "           						      		                		      if n == 7
replace all3and  = "   "           						      		               			      if n == 7
replace all4and  = "   "           						      		               			      if n == 7
replace all5and  = "   "           						      		               			      if n == 7

replace all1_0  = "   \multicolumn{1}{l}{\hspace{4mm} Twin}" 		 		               if n == 8  
replace all1_0  = "   \multicolumn{1}{l}{\hspace{4mm} Male}" 		 		               if n == 10 | n == 29 | n == 48 
replace all1_0  = "   \multicolumn{1}{l}{\hspace{4mm} Birth Weight}" 		 		       if n == 12 | n == 31 | n == 50
replace all1_0  = "   \multicolumn{1}{l}{\hspace{4mm} Gestational Age}" 		 		   if n == 14 | n == 33 | n == 52
replace all1_0  = "   \multicolumn{1}{l}{\hspace{4mm} Black}" 		 		               if n == 16 | n == 35 | n == 54
replace all1_0  = "   \multicolumn{1}{l}{\hspace{4mm} Hispanic}" 		 		           if n == 18 | n == 37 | n == 56

replace all1_0   = "  \multicolumn{6}{l}{\hspace{2mm} \textbf{\textit{Panel a2.} Childcare, Parenting, Age-Three IQ}}"   if n == 20
replace all0and  = "   "           						      		                		      if n == 20
replace all1and  = "   "           						      		                		      if n == 20
replace all2and  = "   "           						      		                		      if n == 20
replace all3and  = "   "           						      		               			      if n == 20
replace all4and  = "   "           						      		               			      if n == 20
replace all5and  = "   "           						      		               			      if n == 20

replace all1_0  = "   \multicolumn{1}{l}{\hspace{4mm} Childcare}" 		 		           if n == 21 | n == 40 | n == 59
replace all1_0  = "   \multicolumn{1}{l}{\hspace{4mm} Parenting}" 		 		           if n == 23 | n == 42 | n == 61
replace all1_0  = "   \multicolumn{1}{l}{\hspace{4mm} Stanford-Binet}" 		 		       if n == 25 | n == 44 | n == 63


replace all1_0   = "  \multicolumn{7}{l}{\textbf{\textit{Panel b}. Singletons}}"            				     if n == 27
replace all0and  = "   "           						      		                		   					 if n == 27
replace all1and  = "   "           						      		                		   					 if n == 27
replace all2and  = "   "           						      		                		   					 if n == 27
replace all3and  = "   "           						      		               			   					 if n == 27
replace all4and  = "   "           						      		               			   					 if n == 27
replace all5and  = "   "           						      		               			   					 if n == 27
		
replace all1_0   = "  \multicolumn{6}{l}{\hspace{2mm} \textbf{\textit{Panel b1}. Baseline}}"      if n == 28
replace all0and  = "   "           						      		                		      if n == 28
replace all1and  = "   "           						      		                		      if n == 28
replace all2and  = "   "           						      		                		      if n == 28
replace all3and  = "   "           						      		               			      if n == 28
replace all4and  = "   "           						      		               			      if n == 28
replace all5and  = "   "           						      		               			      if n == 28

replace all1_0   = "  \multicolumn{6}{l}{\hspace{2mm} \textbf{\textit{Panel b2}. Childcare, Parenting, Age-Three IQ}}"   if n == 39
replace all0and  = "   "           						      		                		      if n == 39
replace all1and  = "   "           						      		                		      if n == 39
replace all2and  = "   "           						      		                		      if n == 39
replace all3and  = "   "           						      		               			      if n == 39
replace all4and  = "   "           						      		               			      if n == 39
replace all5and  = "   "           						      		               			      if n == 39

replace all1_0   = "  \multicolumn{7}{l}{\textbf{\textit{Panel c}. Twins}}"            				             if n == 46
replace all0and  = "   "           						      		                		   					 if n == 46
replace all1and  = "   "           						      		                		   					 if n == 46
replace all2and  = "   "           						      		                		   					 if n == 46
replace all3and  = "   "           						      		               			   					 if n == 46
replace all4and  = "   "           						      		               			   					 if n == 46
replace all5and  = "   "           						      		               			   					 if n == 46
		
replace all1_0   = "  \multicolumn{6}{l}{\hspace{2mm} \textbf{\textit{Panel c1}. Baseline}}"      if n == 47
replace all0and  = "   "           						      		                		      if n == 47
replace all1and  = "   "           						      		                		      if n == 47
replace all2and  = "   "           						      		                		      if n == 47
replace all3and  = "   "           						      		               			      if n == 47
replace all4and  = "   "           						      		               			      if n == 47
replace all5and  = "   "           						      		               			      if n == 47

replace all1_0   = "  \multicolumn{6}{l}{\hspace{2mm} \textbf{\textit{Panel c2}. Childcare, Parenting, Age-Three IQ}}"   if n == 58
replace all0and  = "   "           						      		                		      if n == 58
replace all1and  = "   "           						      		                		      if n == 58
replace all2and  = "   "           						      		                		      if n == 58
replace all3and  = "   "           						      		               			      if n == 58
replace all4and  = "   "           						      		               			      if n == 58
replace all5and  = "   "           						      		               			      if n == 58


// order
global orderlist all1_0 all0and
foreach num of numlist 1(1)6 {
	global orderlist $orderlist all`num' all`num'and
}
order $orderlist
// other formating

replace all6and = "\\   "
foreach num of numlist 9(2)19 30(2)38 49(2)57 {
	replace all1and = "& ["       if n == `num' 
	replace all2and = "] &"       if n == `num' 
}

foreach num of numlist 9(2)19 22(2)26 30(2)38 41(2)45 49(2)57 60(2)64 {
	replace all3and = "& ["       if n == `num' 
	replace all4and = "] &"       if n == `num' 
	
	replace all5and = "& ["       if n == `num' 
	replace all6and = "] \\"      if n == `num' 
}

replace all6and = " \\ \midrule " 					if n == 2
replace all6and = "] \\ \midrule " 		            if n == 26 | n == 45
replace all6and = " \\ \hdashline " 		        if n == 6   | n == 27 | n == 46
replace all6and = "] \\ \hdashline " 		        if n == 19  | n == 38 | n == 57
replace all6and = "] \\ \bottomrule \end{tabular}"   if n == 64
replace all6and = " " 								if n == 1
replace all6and = " \\ \cmidrule(l{.15cm}r{.15cm}){2-3} \cmidrule(l{.15cm}r{.15cm}){4-5} \cmidrule(l{.15cm}r{.15cm}){6-7}" if n == 5

// put together in string
gen all = " "
foreach num of numlist 1(1)6 {
	tostring all`num' , replace force format(%15.2fc)
	replace  all`num'  = "" if all`num'  == "."
}

global orderin all1_0 all0and
foreach num of numlist 1(1)6 {
	global orderin $orderin all`num' all`num'and 
}
global orderin $orderin all`num'
order $orderin n

foreach var of varlist all1_0-all6and {	
	replace  all = all + `var'
	replace  all`num' = " " if all`num' == "."
}

// save in tex format
keep all
cd $output
outsheet using samplesdescribe.tex, noquote nonames replace
